/**
 * @(#)CSVReader.java
 *
 * Copyright (c) 2007 sinba. All Rights Reserved.
 */
package com.sinba.common.file.csv;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

/**
 * A very simple CSV reader released under a commercial-friendly license.
 *
 * @version 1.00 2008-09-25
 * @author <a href="mailto:sinbawang@msn.com">Sinba Wang</a>
 */
public class CSVReader
{
    /** The default separator to use if none is supplied to the constructor. */
    public static final char DEFAULT_SEPARATOR = ',';

    /**
     * The default quote character to use if none is supplied to the constructor.
     */
    public static final char DEFAULT_QUOTE_CHARACTER = '"';

    /**
     * The default line to start reading.
     */
    public static final int DEFAULT_SKIP_LINES = 1;
    private BufferedReader br;
    private boolean hasNext = true;
    private char separator;
    private char quotechar;
    private int skipLines;
    private boolean linesSkiped;

    /**
     * Constructs CSVReader using a comma for the separator.
     *
     * @param reader the reader to an underlying CSV source
     */
    public CSVReader( Reader reader ){
    	
        this( reader, DEFAULT_SEPARATOR );
        
    }

    /**
     * Constructs CSVReader with supplied separator.
     *
     * @param reader the reader to an underlying CSV source
     * @param separator the delimiter to use for separating entries
     */
    public CSVReader( Reader reader, char separator )
    {
        this( reader, separator, DEFAULT_QUOTE_CHARACTER );
    }

    /**
     * Constructs CSVReader with supplied separator and quote char.
     *
     * @param reader the reader to an underlying CSV source
     * @param separator the delimiter to use for separating entries
     * @param quotechar the character to use for quoted elements
     */
    public CSVReader( Reader reader, char separator, char quotechar ){
    	
        this( reader, separator, quotechar, DEFAULT_SKIP_LINES );
    }

    /**
     * Constructs CSVReader with supplied separator and quote char.
     *
     * @param reader the reader to an underlying CSV source
     * @param separator the delimiter to use for separating entries
     * @param quotechar the character to use for quoted elements
     * @param line the line number to skip for start reading
     */
    public CSVReader( Reader reader, char separator, char quotechar, int line )
    {
        this.br = new BufferedReader( reader );
        this.separator = separator;
        this.quotechar = quotechar;
        this.skipLines = line;
    }

    /**
     * Reads the entire file into a List with each element being a String[] of tokens.
     *
     * @return a List of String[], with each String[] representing a line of the file.
     *
     * @throws IOException if bad things happen during the read
     */
    public List<Object> readAll(  )
                         throws IOException
    {
        List<Object> allElements = new ArrayList<Object>(  );

        while ( hasNext )
        {
            String[] nextLineAsTokens = readNext(  );

            if ( nextLineAsTokens != null )
            {
                allElements.add( nextLineAsTokens );
            }
        }

        return allElements;
    }

    /**
     * Reads the next line from the buffer and converts to a string array.
     *
     * @return a string array with each comma-separated element as a separate entry
     *
     * @throws IOException if bad things happen during the read
     */
    public String[] readNext(  )
                      throws IOException
    {
        String nextLine = getNextLine(  );

        return hasNext ? parseLine( nextLine ) : null;
    }

    /**
     * Reads the next line from the file.
     *
     * @return the next line from the file without trailing newline
     * @throws IOException if bad things happen during the read
     */
    private String getNextLine(  )
                        throws IOException
    {
        if ( ! this.linesSkiped )
        {
            for ( int i = 0; i < skipLines; i++ )
            {
                br.readLine(  );
            }

            this.linesSkiped = true;
        }

        String nextLine = br.readLine(  );

        if ( nextLine == null )
        {
            hasNext = false;
        }

        return hasNext ? nextLine : null;
    }

    /**
     * Parses an incoming String and returns an array of elements.
     *
     * @param nextLine the string to parse
     * @return the comma-tokenized list of elements, or null if nextLine is null
     * @throws IOException if bad things happen during the read
     */
    private String[] parseLine( String nextLine )
                        throws IOException
    {
        if ( nextLine == null )
        {
            return null;
        }

        List<Object> tokensOnThisLine = new ArrayList<Object>(  );
        StringBuffer sb = new StringBuffer(  );
        boolean inQuotes = false;

        do
        {
            if ( inQuotes )
            {
                // continuing a quoted section, reappend newline
                sb.append( "\n" );
                nextLine = getNextLine(  );

                if ( nextLine == null )
                {
                    break;
                }
            }

            for ( int i = 0; i < nextLine.length(  ); i++ )
            {
                char c = nextLine.charAt( i );

                if ( c == quotechar )
                {
                    // this gets complex... the quote may end a quoted block, or escape another quote.
                    // do a 1-char lookahead:
                    if ( inQuotes // we are in quotes, therefore there can be escaped quotes in here.
                              &&
                             ( nextLine.length(  ) > ( i + 1 ) // there is indeed another character to check.
                         ) &&
                             ( nextLine.charAt( i + 1 ) == quotechar ) )
                    { // ..and that char. is a quote also.
                      // we have two quote chars in a row == one quote char, so consume them both and
                      // put one on the token. we do *not* exit the quoted text.
                        sb.append( nextLine.charAt( i + 1 ) );
                        i++;
                    } else
                    {
                        inQuotes = ! inQuotes;

                        // the tricky case of an embedded quote in the middle: a,bc"d"ef,g
                        if ( ( i > 2 //not on the begining of the line
                             ) &&
                                 ( 
                                     nextLine.charAt( i - 1 ) != this.separator //not at the begining of an escape sequence 

                                  ) &&
                                 ( nextLine.length(  ) > ( i + 1 ) ) &&
                                 ( 
                                     nextLine.charAt( i + 1 ) != this.separator //not at the	end of an escape sequence

                                  ) )
                        {
                            sb.append( c );
                        }
                    }
                } else if ( ( c == separator ) && ! inQuotes )
                {
                    tokensOnThisLine.add( sb.toString(  ) );
                    sb = new StringBuffer(  ); // start work on next token
                } else
                {
                    sb.append( c );
                }
            }
        } while ( inQuotes );

        tokensOnThisLine.add( sb.toString(  ) );

        return (String[]) tokensOnThisLine.toArray( new String[0] );
    }

    /**
     * Closes the underlying reader.
     *
     * @throws IOException if the close fails
     */
    public void close(  )
               throws IOException
    {
        br.close(  );
    }
}
